knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(cache = TRUE)
# Git setup / verification
git_tag <- stringr::str_sub(git2r::revparse_single(git2r::repository(here::here()),"HEAD")$sha, 1, 8)
git_status <- git2r::status(untracked = FALSE)
if(!interactive()){
if(length(git_status$staged) != 0 ||
length(git_status$unstaged) != 0){
stop("There are uncommited changes to the repository")
}
}
# Load Libraries
suppressPackageStartupMessages({
library(tidyverse)
library(glue)
library(stringr)
library(signeR)
library(SomaticSignatures)
library(plotly)
library(cowplot)
})
# Utility Functions
theme_modify <- function(plot, ..., default_theme = theme_get()){
update_functions <- list(...)
theme_values <- list()
for(theme_var in names(update_functions)){
theme_var_value <- plot$theme[[theme_var]]
if(is.null(theme_var_value)){
theme_var_value <- default_theme[[theme_var]]
}
theme_var_value <- update_functions[[theme_var]](theme_var_value)
theme_values[[theme_var]] <- theme_var_value
}
do.call(theme, theme_values)
}
We’re loading the sample description matrix from the provided excel spreadsheat. We parse the sample names for a short sample_id and the Genotype field for two variables: Tsg (true or false) and Fhit (+/+ or -/-)
sample_info <-
readxl::read_excel("Data/Sample information.xlsx") %>%
filter(!is.na(Genotype)) %>%
mutate(Tsg = str_detect(Genotype, "Tsg")) %>%
mutate(Fhit = case_when(
str_detect(Genotype, stringr::fixed("Fhit +/+")) ~ "+/+",
str_detect(Genotype, stringr::fixed("Fhit -/-")) ~ "-/-"
)) %>%
mutate(sample_id = str_replace_all(`Abreviated Sample name`, " ", ""))
sample_info
| Abreviated Sample name | Genotype | Tissue | seq library type | Tsg | Fhit | sample_id |
|---|---|---|---|---|---|---|
| CT2-1F | Fhit -/- | liver | non-PCR amplified | FALSE | -/- | CT2-1F |
| CT2-3F | Tsg;Fhit -/- | liver | non-PCR amplified | TRUE | -/- | CT2-3F |
| CT3-1M | Fhit -/- | liver | non-PCR amplified | FALSE | -/- | CT3-1M |
| CT3- 3M | Tsg;Fhit +/+ | liver | non-PCR amplified | TRUE | +/+ | CT3-3M |
| CT4-2F | Tsg;Fhit -/- | liver | non-PCR amplified | TRUE | -/- | CT4-2F |
| CT4-3F | Fhit +/+ | liver | non-PCR amplified | FALSE | +/+ | CT4-3F |
| CT9-1F | Tsg;Fhit +/+ | liver | non-PCR amplified | TRUE | +/+ | CT9-1F |
| CT11-2F | Fhit +/+ | liver | non-PCR amplified | FALSE | +/+ | CT11-2F |
| CT15-1M | Fhit +/+ | liver | non-PCR amplified | FALSE | +/+ | CT15-1M |
| CT17-1M | Tsg;Fhit -/- | liver | non-PCR amplified | TRUE | -/- | CT17-1M |
| CT18-2F | Fhit -/- | liver | non-PCR amplified | FALSE | -/- | CT18-2F |
| CT20-2M | Fhit -/- | liver | non-PCR amplified | FALSE | -/- | CT20-2M |
| CT22-3M | Tsg;Fhit +/+ | liver | non-PCR amplified | TRUE | +/+ | CT22-3M |
| CT23-2F | Tsg;Fhit +/+ | liver | non-PCR amplified | TRUE | +/+ | CT23-2F |
| CT24-1M | Tsg;Fhit -/- | liver | non-PCR amplified | TRUE | -/- | CT24-1M |
| K116 | Fhit +/+ | sperm | PCR amplified | FALSE | +/+ | K116 |
| K124 | Fhit -/- | sperm | PCR amplified | FALSE | -/- | K124 |
| KB15 | Fhit -/- | sperm | PCR amplified | FALSE | -/- | KB15 |
The complete reference assembly for these files was downloaded from https://www.ncbi.nlm.nih.gov/assembly/GCF_000001635.26/ as a “Genomic FASTA (.fna)” file.
reference_assembly <- FaFile("Data/GCF_000001635.26_GRCm38.p6_genomic.fna")
As a preliminary analysis I’m loading the liver sample VCF files. These are matched to the sample metadata based on the short sample ids (e.g. CT4-2F) after fixing one inconsistency in naming (“CT23_2F” vs “CT23-2F”).
#VRanges object containing data from vcf files
vr <-
fs::dir_ls("Data", glob = "*.vcf.gz") %>% # list vcf files
# purrr::keep(str_detect, "liver") %>% # keep only the liver samples
str_replace("KB15Fhit_1.filter.snp.vcf.gz$",
"KB15Fhit_1.filter.snp.vcf") %>% # this file errors on gzipped version
map(~{message(.x); readVcfAsVRanges(.x)}) %>% # read vcf files into VRanges objects
{do.call(c, unname(.))} # merge VRanges objects
## Data/CT11-2F_Fhit_liverA_1.filter.snp.vcf.gz
## Data/CT15-1M_Fhit_liverA_1.filter.snp.vcf.gz
## Data/CT17-1M_Tsg_Fhit_liverA_1.filter.snp.vcf.gz
## Data/CT18-2F_Fhit_liverA_1.filter.snp.vcf.gz
## Data/CT2-1F_Fhit_liverA_1.filter.snp.vcf.gz
## Data/CT2-3F_Tsg_Fhit_liverA_1.filter.snp.vcf.gz
## Data/CT20-2M_Fhit_liverA_1.filter.snp.vcf.gz
## Data/CT22-3M_Tsg_Fhit_liverA_1.filter.snp.vcf.gz
## Data/CT23_2F_Tsg_Fhit_liverA_1.filter.snp.vcf.gz
## Data/CT24-1M_Tsg_Fhit_liverA_1.filter.snp.vcf.gz
## Data/CT3-1M_Fhit_liverA_1.filter.snp.vcf.gz
## Data/CT3-3M_Tsg_Fhit_liverA_1.filter.snp.vcf.gz
## Data/CT4-2F_Tsg_Fhit_liverA_1.filter.snp.vcf.gz
## Data/CT4-3F_Fhit_liverA_1.filter.snp.vcf.gz
## Data/CT9-1F_Tsg_Fhit_liverA_1.filter.snp.vcf.gz
## Data/K116Fhit_1.filter.snp.vcf.gz
## Data/K124Fhit_1.filter.snp.vcf.gz
## Data/KB15Fhit_1.filter.snp.vcf
We keep only those SNVs for which all filters passed. This includes 2 filters: “Low quality” and “QD < 2.0 || FS > 60.0 || MQ <40.0 || MQRankSum < -12.5 || ReadPosRankSum < -8.0”
PASS <-
softFilterMatrix(vr) %>%
apply(all, MARGIN = 1)
vr <- vr[PASS]
message(glue("{sum(PASS)} of {length(PASS)} ({sprintf('%.2f', 100 * sum(PASS)/length(PASS))}%) SNVs across all files passed filters."))
## 1063834 of 1813765 (58.65%) SNVs across all files passed filters.
#Sample info replicated to match the rows in `vr`
vr_metadata <-
vr %>%
sampleNames() %>% as.vector() %>% # extract sampleNames
str_replace(stringr::fixed("CT23_2F"), "CT23-2F") %>% # fix inconsistently named vcf file name
str_extract("^[^_]+") %>% # extract sample_id from names
str_replace("Fhit$", "") %>% # sperm samples have 'Fhit' appended in file names
{tibble(sample_id = .)} %>%
left_join(sample_info, by = "sample_id") # combine with sample info
#add metadata columns to `vr`
vr$sample_id <- vr_metadata$sample_id
vr$Tissue <- vr_metadata$Tissue
vr$Genotype <- vr_metadata$Genotype
vr$Fhit <- vr_metadata$Fhit
vr$Tsg <- vr_metadata$Tsg
#replace sample names with `sample_id`
sampleNames(vr) <- vr$sample_id
The mutation contexts are extracted using the SomaticSignatures::mutationContext function and supplying it with the reference assembly loaded above. This assembly has chromosomes split in smaller contigs which match the contig identifiers in the VCF files. There is no need to translate to chromosome numbers.
#add mutation context information to `vr`
vr <- mutationContext(vr, reference_assembly)
# Count number of samples each location + alteration combination appears in
snvs <-
vr %>%
as_tibble() %>%
mutate(length = abs(end - start) + 1) %>%
{assertthat::assert_that(all(.$length == 1)); .} %>%
group_by(seqnames, start, alteration, context) %>%
select(sample_id, Tissue, Genotype) %>%
tidyr::nest() %>%
mutate(count = data %>% map_int(nrow),
count_liver = data %>% map_int(. %>% filter(Tissue == "liver") %>% nrow)) %>%
ungroup()
## Adding missing grouping variables: `seqnames`, `start`, `alteration`, `context`
snvs %>%
{
ggplot(., aes(x = count, group = count)) +
geom_histogram(binwidth = 1) +
ggtitle("Number of Samples Each SNV Occurs in")
} %>% ggplotly()
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
snvs %>%
mutate(count_sperm = count - count_liver) %>%
group_by(count_liver, count_sperm) %>%
summarise(n_snvs = n(), .groups = "drop") %>%
{
ggplot(., aes(x = count_liver, y = count_sperm, fill = n_snvs)) +
geom_tile() +
scale_fill_viridis_c(option = "B") +
labs(title = "Overlap between common SNVs in Liver and Sperm",
x = "Liver Samples Containing SNV",
y = "Sperm Samples Containing SNV",
fill = "Unique SNVs")
} %>%
ggplotly()
Most SNVs were unique to one sample or present in both sperm and liver samples.
n_samples <- nrow(sample_info)
common_snvs <-
snvs %>%
filter(count > .5 * n_samples) %>%
select(seqnames, start, alteration) %>%
mutate(snv_id = glue("{seqnames}_{start}_{alteration}"))
message(glue("{nrow(common_snvs)} of {nrow(snvs)} ({sprintf('%.2f', 100 * nrow(common_snvs) / nrow(snvs))}%) SNVs classified as 'common' due to occurance in >50% of samples."))
## 50284 of 156235 (32.18%) SNVs classified as 'common' due to occurance in >50% of samples.
vr$snv_id <-
vr %>%
as_tibble() %>%
with(glue("{seqnames}_{start}_{alteration}"))
COMMON_SNV <- vr$snv_id %in% common_snvs$snv_id
vr <- vr[!COMMON_SNV]
message(glue("{sum(COMMON_SNV)} of {length(COMMON_SNV)} ({sprintf('%.2f', 100 * sum(COMMON_SNV) / length(COMMON_SNV))}%) SNV calls removed as 'common'."))
## 793511 of 1063834 (74.59%) SNV calls removed as 'common'.
snv_calls <-
snvs %>%
# create `motif` column in the format signeR uses
tidyr::extract(col = "alteration",
regex = "([TCGA])([TCGA])",
into = c(".from", ".to"),
remove = FALSE) %>%
tidyr::extract(col = "context",
regex = "([TCGAN])\\.([TCGAN])",
into = c(".l_context", ".r_context"),
remove = FALSE) %>%
mutate(motif = glue("{.from}>{.to}:{.l_context}{.from}{.r_context}")) %>%
select(-starts_with('.')) %>%
mutate(snv_id = glue("{seqnames}_{start}_{alteration}")) %>%
mutate(is_common = snv_id %in% common_snvs$snv_id) %>%
tidyr::unnest(data)
snv_calls %>%
filter(!str_detect(context, "N")) %>%
group_by(alteration, context, is_common) %>%
summarise(n = n(), .groups = "drop") %>%
mutate(is_common = case_when(
is_common ~ "Common",
!is_common ~ "Not Common"
)) %>%
mutate(alteration = str_replace(alteration, "^(.)(.)$", "\\1>\\2")) %>%
{
ggplot(., aes(x = context, y = n)) +
geom_bar(stat = "identity") +
facet_grid(is_common ~ alteration) +
theme(axis.text.x = element_text(angle = 90)) +
labs(title = "Motif Frequencies in Common and Non Common SNVs")
} %>%
ggplotly()
pca_result <-
snv_calls %>%
filter(!str_detect(context, "N")) %>%
mutate(group_id =
case_when(
is_common ~ "Common SNVs",
TRUE ~ sample_id
)) %>%
group_by(group_id, motif) %>%
summarise(n = n(), .groups = "drop") %>%
#normalize by sample
group_by(group_id) %>%
mutate(n = n / sum(n)) %>%
ungroup() %>%
pivot_wider(names_from = group_id, values_from = n) %>%
column_to_rownames("motif") %>%
as.matrix() %>% t() %>%
pcaMethods::pca(nPcs = 2, center = TRUE, scale = "vector")
pca_result@scores %>%
as.data.frame() %>%
rownames_to_column("sample_id") %>%
left_join(sample_info, by = "sample_id") %>%
mutate(group = case_when(
str_detect(sample_id, "Common") ~ "Common SNVs",
TRUE ~ Tissue
)) %>%
{
ggplot(., aes(x = PC1, y = PC2, color = group)) +
geom_point(aes(text = glue("sample_id = {sample_id}",
"genotype = {Genotype}",
.sep = "\n")))
} %>%
ggplotly()
## Warning: Ignoring unknown aesthetics: text
This plot shows PCA analysis of filtered sample motif frequencies compared to the a single pseudo-sample comprising the all of the SNV calls for “common” SNVs combined.
pca_result@loadings %>%
as.data.frame() %>%
rownames_to_column("motif") %>%
tidyr::extract(col = "motif",
regex = "([TCGA]>[TCGA]):([TCGA][TCGA][TCGA])",
into = c("alteration", "context"),
remove = FALSE) %>%
{
ggplot(., aes(x = PC1, y = PC2, color = alteration)) +
geom_point(aes(text = glue("motif = {motif}", .sep = "\n"))) +
labs(title = "Loadings Plot of PCA results")
} %>%
ggplotly()
## Warning: Ignoring unknown aesthetics: text
No clear pattern is seen in which motifs distinguish the groups.
# Extract motif *count* matrix
motif_counts <-
tibble(sample_id = vr$sample_id,
alteration = as.character(vr$alteration),
context = as.character(vr$context)) %>%
# create `motif` column in the format signeR uses
tidyr::extract(col = "alteration",
regex = "([TCGA])([TCGA])",
into = c(".from", ".to"),
remove = FALSE) %>%
tidyr::extract(col = "context",
regex = "([TCGA])\\.([TCGA])",
into = c(".l_context", ".r_context"),
remove = FALSE) %>%
mutate(motif = glue("{.from}>{.to}:{.l_context}{.from}{.r_context}")) %>%
# count motif occurrences by sample
group_by(sample_id, motif, alteration, context) %>%
summarise(N = n(), .groups = "drop")
# convert to matrix
motif_count_matrix <-
motif_counts %>%
pivot_wider(id_cols = motif, names_from = sample_id, values_from = N) %>%
column_to_rownames("motif") %>%
as.matrix()
The motif matrix is then extracted for further analysis. This matrix containing frequencies of each mutation motif for each sample is provided as a *.csv file accompanying this report.
#matrix of sample x motif frequencies
motif_count_matrix %>%
as.data.frame() %>%
rownames_to_column("motif") %>%
select(motif, everything()) %>%
write_csv("./Results/motif_count_matrix.csv")
motif_count_matrix %>%
format(digits = 3) %>%
as.data.frame()
| CT11-2F | CT15-1M | CT17-1M | CT18-2F | CT2-1F | CT2-3F | CT20-2M | CT22-3M | CT23-2F | CT24-1M | CT3-1M | CT3-3M | CT4-2F | CT4-3F | CT9-1F | K116 | K124 | KB15 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| C>A:ACA | 209 | 197 | 141 | 156 | 177 | 163 | 193 | 180 | 197 | 167 | 188 | 211 | 151 | 191 | 211 | 171 | 178 | 168 |
| C>A:ACC | 116 | 109 | 118 | 112 | 115 | 107 | 126 | 103 | 117 | 118 | 118 | 127 | 94 | 117 | 116 | 158 | 131 | 147 |
| C>A:ACG | 49 | 54 | 39 | 48 | 32 | 43 | 52 | 53 | 51 | 35 | 40 | 52 | 31 | 41 | 51 | 44 | 54 | 55 |
| C>A:ACT | 117 | 96 | 94 | 87 | 102 | 117 | 106 | 99 | 103 | 86 | 89 | 99 | 95 | 88 | 103 | 94 | 94 | 98 |
| C>A:CCA | 140 | 119 | 121 | 98 | 119 | 124 | 130 | 126 | 145 | 112 | 119 | 145 | 97 | 128 | 137 | 115 | 116 | 128 |
| C>A:CCC | 161 | 166 | 149 | 159 | 141 | 155 | 154 | 168 | 161 | 160 | 151 | 177 | 169 | 175 | 194 | 137 | 176 | 148 |
| C>A:CCG | 38 | 39 | 29 | 32 | 35 | 35 | 38 | 44 | 31 | 37 | 35 | 35 | 31 | 31 | 39 | 34 | 30 | 28 |
| C>A:CCT | 113 | 106 | 79 | 89 | 88 | 111 | 105 | 104 | 115 | 95 | 93 | 107 | 80 | 94 | 109 | 111 | 116 | 99 |
| C>A:GCA | 80 | 79 | 85 | 68 | 77 | 68 | 85 | 79 | 82 | 74 | 65 | 87 | 65 | 76 | 90 | 84 | 84 | 94 |
| C>A:GCC | 59 | 55 | 43 | 30 | 44 | 48 | 49 | 47 | 54 | 52 | 52 | 58 | 39 | 48 | 47 | 46 | 55 | 54 |
| C>A:GCG | 33 | 26 | 39 | 26 | 24 | 34 | 28 | 42 | 34 | 34 | 37 | 43 | 27 | 40 | 49 | 32 | 34 | 29 |
| C>A:GCT | 94 | 98 | 78 | 75 | 65 | 75 | 75 | 86 | 93 | 76 | 77 | 99 | 72 | 83 | 99 | 67 | 66 | 68 |
| C>A:TCA | 96 | 79 | 65 | 73 | 72 | 75 | 74 | 86 | 88 | 70 | 60 | 96 | 89 | 83 | 101 | 74 | 77 | 60 |
| C>A:TCC | 108 | 94 | 63 | 74 | 85 | 91 | 76 | 90 | 91 | 77 | 82 | 92 | 69 | 88 | 110 | 103 | 114 | 112 |
| C>A:TCG | 26 | 22 | 23 | 22 | 24 | 21 | 29 | 22 | 23 | 25 | 20 | 26 | 18 | 19 | 24 | 29 | 15 | 22 |
| C>A:TCT | 332 | 329 | 323 | 284 | 267 | 302 | 343 | 324 | 317 | 263 | 331 | 386 | 290 | 307 | 410 | 175 | 154 | 168 |
| C>G:ACA | 219 | 216 | 187 | 179 | 176 | 227 | 219 | 195 | 208 | 217 | 196 | 220 | 180 | 218 | 226 | 229 | 180 | 195 |
| C>G:ACC | 62 | 56 | 54 | 54 | 66 | 63 | 74 | 77 | 52 | 46 | 51 | 66 | 51 | 60 | 73 | 60 | 54 | 52 |
| C>G:ACG | 54 | 57 | 57 | 48 | 63 | 39 | 49 | 49 | 64 | 58 | 46 | 48 | 55 | 56 | 52 | 85 | 82 | 73 |
| C>G:ACT | 119 | 106 | 79 | 87 | 91 | 89 | 103 | 120 | 102 | 77 | 85 | 94 | 62 | 90 | 110 | 91 | 82 | 94 |
| C>G:CCA | 50 | 54 | 43 | 37 | 49 | 43 | 55 | 50 | 47 | 48 | 49 | 54 | 34 | 48 | 46 | 55 | 39 | 46 |
| C>G:CCC | 55 | 39 | 53 | 50 | 47 | 50 | 47 | 45 | 56 | 38 | 46 | 56 | 38 | 42 | 46 | 48 | 60 | 53 |
| C>G:CCG | 38 | 29 | 31 | 28 | 31 | 30 | 23 | 28 | 24 | 30 | 36 | 36 | 25 | 27 | 27 | 52 | 33 | 43 |
| C>G:CCT | 51 | 57 | 42 | 51 | 48 | 50 | 55 | 58 | 54 | 44 | 63 | 64 | 48 | 49 | 56 | 55 | 53 | 58 |
| C>G:GCA | 117 | 106 | 86 | 107 | 68 | 66 | 97 | 133 | 105 | 86 | 113 | 118 | 137 | 112 | 109 | 119 | 139 | 148 |
| C>G:GCC | 74 | 105 | 62 | 91 | 57 | 61 | 52 | 82 | 62 | 72 | 78 | 68 | 93 | 76 | 70 | 68 | 50 | 83 |
| C>G:GCG | 217 | 244 | 183 | 277 | 157 | 165 | 187 | 206 | 182 | 136 | 238 | 258 | 345 | 287 | 196 | 794 | 843 | 805 |
| C>G:GCT | 137 | 128 | 93 | 133 | 107 | 87 | 106 | 105 | 106 | 111 | 134 | 134 | 158 | 131 | 124 | 94 | 89 | 116 |
| C>G:TCA | 52 | 60 | 47 | 52 | 47 | 53 | 52 | 63 | 56 | 46 | 56 | 64 | 49 | 72 | 66 | 74 | 62 | 69 |
| C>G:TCC | 71 | 70 | 55 | 61 | 53 | 66 | 62 | 76 | 65 | 53 | 78 | 78 | 66 | 70 | 81 | 106 | 83 | 87 |
| C>G:TCG | 41 | 42 | 37 | 44 | 29 | 36 | 40 | 42 | 32 | 45 | 44 | 40 | 37 | 34 | 40 | 53 | 43 | 55 |
| C>G:TCT | 222 | 216 | 195 | 162 | 196 | 206 | 211 | 228 | 205 | 177 | 208 | 215 | 170 | 217 | 217 | 155 | 159 | 160 |
| C>T:ACA | 487 | 445 | 393 | 398 | 392 | 445 | 448 | 420 | 452 | 354 | 446 | 463 | 379 | 410 | 451 | 407 | 389 | 449 |
| C>T:ACC | 157 | 167 | 163 | 145 | 146 | 146 | 160 | 174 | 171 | 155 | 159 | 173 | 164 | 150 | 168 | 160 | 145 | 176 |
| C>T:ACG | 200 | 203 | 154 | 164 | 195 | 181 | 204 | 193 | 191 | 135 | 165 | 187 | 161 | 174 | 221 | 164 | 155 | 173 |
| C>T:ACT | 229 | 251 | 192 | 183 | 189 | 199 | 207 | 228 | 197 | 180 | 183 | 246 | 171 | 217 | 220 | 209 | 223 | 223 |
| C>T:CCA | 225 | 236 | 181 | 170 | 187 | 216 | 229 | 239 | 206 | 183 | 202 | 230 | 187 | 206 | 230 | 210 | 186 | 202 |
| C>T:CCC | 319 | 343 | 307 | 312 | 245 | 310 | 333 | 306 | 296 | 279 | 291 | 394 | 293 | 312 | 326 | 369 | 380 | 492 |
| C>T:CCG | 193 | 186 | 146 | 150 | 182 | 171 | 186 | 174 | 168 | 144 | 144 | 190 | 130 | 151 | 172 | 149 | 170 | 144 |
| C>T:CCT | 305 | 338 | 282 | 284 | 266 | 265 | 334 | 340 | 297 | 293 | 312 | 368 | 283 | 284 | 353 | 324 | 374 | 413 |
| C>T:GCA | 160 | 145 | 159 | 140 | 147 | 154 | 153 | 183 | 173 | 134 | 126 | 165 | 133 | 127 | 166 | 134 | 138 | 135 |
| C>T:GCC | 126 | 124 | 109 | 114 | 112 | 115 | 129 | 133 | 122 | 126 | 131 | 130 | 102 | 113 | 135 | 133 | 119 | 133 |
| C>T:GCG | 177 | 164 | 118 | 153 | 151 | 163 | 177 | 172 | 168 | 147 | 159 | 157 | 116 | 162 | 177 | 149 | 133 | 151 |
| C>T:GCT | 165 | 168 | 139 | 149 | 149 | 146 | 159 | 171 | 166 | 143 | 158 | 190 | 146 | 147 | 177 | 164 | 175 | 152 |
| C>T:TCA | 208 | 228 | 163 | 148 | 181 | 174 | 191 | 217 | 218 | 182 | 177 | 241 | 164 | 182 | 222 | 190 | 190 | 185 |
| C>T:TCC | 418 | 419 | 296 | 340 | 333 | 339 | 414 | 371 | 371 | 306 | 380 | 430 | 340 | 370 | 415 | 458 | 395 | 456 |
| C>T:TCG | 132 | 153 | 113 | 128 | 125 | 127 | 137 | 142 | 156 | 116 | 115 | 145 | 113 | 121 | 155 | 108 | 105 | 95 |
| C>T:TCT | 312 | 318 | 264 | 247 | 245 | 272 | 284 | 311 | 285 | 261 | 249 | 338 | 237 | 296 | 313 | 304 | 262 | 307 |
| T>A:ATA | 138 | 136 | 115 | 115 | 137 | 117 | 125 | 133 | 131 | 112 | 116 | 168 | 97 | 129 | 145 | 134 | 113 | 113 |
| T>A:ATC | 74 | 75 | 56 | 73 | 64 | 61 | 70 | 86 | 79 | 58 | 60 | 82 | 73 | 67 | 72 | 66 | 81 | 67 |
| T>A:ATG | 123 | 108 | 72 | 87 | 85 | 82 | 101 | 116 | 109 | 87 | 89 | 127 | 76 | 97 | 108 | 101 | 82 | 96 |
| T>A:ATT | 129 | 155 | 110 | 94 | 111 | 114 | 111 | 138 | 134 | 122 | 116 | 145 | 110 | 108 | 135 | 117 | 97 | 106 |
| T>A:CTA | 71 | 78 | 53 | 48 | 63 | 52 | 76 | 84 | 67 | 59 | 59 | 69 | 52 | 54 | 66 | 41 | 59 | 58 |
| T>A:CTC | 153 | 157 | 127 | 149 | 130 | 154 | 148 | 133 | 153 | 131 | 133 | 161 | 138 | 139 | 147 | 123 | 130 | 116 |
| T>A:CTG | 93 | 101 | 89 | 79 | 87 | 95 | 88 | 92 | 98 | 79 | 81 | 101 | 87 | 79 | 107 | 92 | 89 | 92 |
| T>A:CTT | 100 | 110 | 84 | 83 | 71 | 81 | 88 | 103 | 86 | 101 | 73 | 107 | 82 | 86 | 124 | 83 | 83 | 97 |
| T>A:GTA | 67 | 85 | 48 | 59 | 53 | 49 | 61 | 72 | 71 | 57 | 49 | 86 | 49 | 67 | 75 | 54 | 58 | 70 |
| T>A:GTC | 43 | 45 | 37 | 39 | 49 | 51 | 42 | 46 | 40 | 38 | 44 | 58 | 34 | 42 | 42 | 50 | 45 | 53 |
| T>A:GTG | 173 | 153 | 148 | 149 | 139 | 155 | 154 | 173 | 167 | 141 | 155 | 181 | 155 | 159 | 185 | 144 | 131 | 145 |
| T>A:GTT | 82 | 72 | 64 | 71 | 69 | 80 | 84 | 80 | 76 | 62 | 72 | 98 | 69 | 69 | 91 | 67 | 68 | 71 |
| T>A:TTA | 113 | 131 | 118 | 86 | 106 | 91 | 123 | 124 | 121 | 95 | 96 | 114 | 90 | 79 | 106 | 104 | 106 | 103 |
| T>A:TTC | 59 | 67 | 61 | 63 | 57 | 74 | 65 | 71 | 67 | 55 | 59 | 75 | 51 | 67 | 66 | 67 | 65 | 63 |
| T>A:TTG | 56 | 64 | 63 | 59 | 64 | 47 | 70 | 89 | 65 | 64 | 54 | 63 | 62 | 53 | 82 | 62 | 57 | 64 |
| T>A:TTT | 152 | 147 | 136 | 127 | 118 | 141 | 147 | 176 | 156 | 125 | 139 | 174 | 116 | 151 | 165 | 122 | 113 | 120 |
| T>C:ATA | 367 | 341 | 301 | 280 | 284 | 318 | 292 | 351 | 322 | 277 | 296 | 339 | 339 | 315 | 346 | 313 | 266 | 284 |
| T>C:ATC | 179 | 163 | 174 | 164 | 127 | 133 | 171 | 175 | 160 | 164 | 154 | 206 | 175 | 177 | 190 | 195 | 194 | 228 |
| T>C:ATG | 202 | 196 | 178 | 165 | 154 | 169 | 173 | 208 | 218 | 168 | 151 | 220 | 147 | 186 | 213 | 156 | 153 | 150 |
| T>C:ATT | 211 | 222 | 150 | 142 | 174 | 158 | 181 | 212 | 190 | 156 | 166 | 222 | 167 | 186 | 211 | 168 | 158 | 181 |
| T>C:CTA | 169 | 162 | 156 | 162 | 150 | 147 | 177 | 173 | 177 | 149 | 178 | 188 | 184 | 170 | 191 | 162 | 158 | 170 |
| T>C:CTC | 855 | 837 | 794 | 982 | 526 | 732 | 876 | 768 | 650 | 616 | 949 | 1098 | 1194 | 956 | 861 | 1193 | 1290 | 1325 |
| T>C:CTG | 197 | 212 | 213 | 176 | 171 | 165 | 206 | 204 | 183 | 151 | 213 | 218 | 208 | 202 | 196 | 205 | 186 | 175 |
| T>C:CTT | 322 | 384 | 342 | 397 | 293 | 346 | 406 | 349 | 344 | 304 | 398 | 450 | 406 | 405 | 385 | 485 | 431 | 513 |
| T>C:GTA | 117 | 137 | 130 | 99 | 112 | 117 | 125 | 119 | 128 | 109 | 117 | 138 | 98 | 123 | 135 | 113 | 111 | 114 |
| T>C:GTC | 133 | 134 | 120 | 127 | 110 | 122 | 129 | 148 | 137 | 117 | 121 | 138 | 130 | 138 | 145 | 127 | 133 | 148 |
| T>C:GTG | 161 | 153 | 139 | 158 | 140 | 146 | 138 | 155 | 163 | 124 | 144 | 173 | 143 | 161 | 177 | 142 | 146 | 136 |
| T>C:GTT | 146 | 162 | 130 | 125 | 129 | 144 | 153 | 162 | 157 | 129 | 130 | 153 | 117 | 145 | 168 | 145 | 129 | 133 |
| T>C:TTA | 122 | 145 | 105 | 133 | 120 | 130 | 127 | 158 | 148 | 118 | 112 | 156 | 133 | 121 | 144 | 123 | 113 | 131 |
| T>C:TTC | 336 | 407 | 345 | 338 | 292 | 370 | 395 | 361 | 351 | 292 | 383 | 433 | 396 | 367 | 437 | 512 | 469 | 679 |
| T>C:TTG | 115 | 128 | 108 | 110 | 109 | 110 | 118 | 137 | 133 | 114 | 111 | 132 | 112 | 100 | 128 | 104 | 122 | 114 |
| T>C:TTT | 301 | 287 | 250 | 287 | 220 | 272 | 279 | 314 | 272 | 251 | 272 | 336 | 256 | 311 | 335 | 290 | 320 | 314 |
| T>G:ATA | 321 | 273 | 299 | 253 | 265 | 289 | 332 | 323 | 331 | 207 | 294 | 323 | 284 | 260 | 364 | 148 | 138 | 161 |
| T>G:ATC | 56 | 57 | 61 | 47 | 60 | 54 | 60 | 54 | 50 | 49 | 40 | 53 | 43 | 54 | 63 | 68 | 53 | 54 |
| T>G:ATG | 99 | 101 | 82 | 102 | 70 | 89 | 98 | 94 | 82 | 78 | 94 | 117 | 102 | 95 | 98 | 142 | 149 | 182 |
| T>G:ATT | 84 | 72 | 61 | 59 | 74 | 78 | 85 | 75 | 82 | 60 | 60 | 94 | 59 | 63 | 95 | 71 | 54 | 87 |
| T>G:CTA | 25 | 33 | 16 | 21 | 25 | 24 | 31 | 33 | 25 | 25 | 23 | 40 | 27 | 29 | 29 | 31 | 30 | 39 |
| T>G:CTC | 31 | 43 | 39 | 44 | 44 | 39 | 47 | 48 | 35 | 30 | 36 | 47 | 42 | 42 | 41 | 35 | 52 | 46 |
| T>G:CTG | 81 | 76 | 90 | 77 | 76 | 70 | 101 | 78 | 79 | 71 | 89 | 105 | 126 | 92 | 89 | 91 | 75 | 103 |
| T>G:CTT | 59 | 64 | 36 | 60 | 58 | 51 | 57 | 54 | 58 | 49 | 60 | 75 | 48 | 58 | 45 | 61 | 61 | 52 |
| T>G:GTA | 133 | 102 | 97 | 128 | 90 | 103 | 109 | 114 | 125 | 100 | 109 | 121 | 141 | 120 | 120 | 113 | 122 | 121 |
| T>G:GTC | 60 | 61 | 46 | 45 | 54 | 49 | 52 | 67 | 44 | 43 | 52 | 50 | 57 | 62 | 53 | 58 | 51 | 64 |
| T>G:GTG | 897 | 938 | 920 | 1144 | 569 | 751 | 1058 | 876 | 712 | 714 | 1039 | 1191 | 1422 | 1094 | 985 | 2799 | 3043 | 2983 |
| T>G:GTT | 202 | 182 | 140 | 192 | 140 | 134 | 155 | 170 | 170 | 126 | 189 | 176 | 203 | 185 | 157 | 155 | 141 | 157 |
| T>G:TTA | 64 | 66 | 54 | 57 | 49 | 55 | 69 | 71 | 74 | 64 | 71 | 85 | 58 | 75 | 88 | 57 | 51 | 72 |
| T>G:TTC | 90 | 82 | 75 | 63 | 79 | 67 | 77 | 91 | 86 | 59 | 71 | 82 | 85 | 72 | 75 | 85 | 80 | 79 |
| T>G:TTG | 169 | 142 | 138 | 143 | 119 | 122 | 139 | 131 | 130 | 114 | 134 | 156 | 143 | 144 | 151 | 126 | 128 | 120 |
| T>G:TTT | 224 | 253 | 142 | 195 | 210 | 178 | 175 | 240 | 196 | 202 | 209 | 177 | 207 | 196 | 169 | 144 | 140 | 149 |
This is a heatmap of the mutation frequencies by sample. Columns (samples) and rows (motifs) are clustered by similarity within this dataset.
mm_heatmap_labels <-
sample_info %>%
mutate(label = glue("{sample_id} {Genotype}")) %>%
column_to_rownames("sample_id") %>%
{.[colnames(motif_count_matrix), ]} %>%
pull(label)
motif_count_matrix %>%
magrittr::set_colnames(mm_heatmap_labels) %>%
gplots::heatmap.2(
xlab = "Samples",
ylab = "Mutation Motifs", labRow = FALSE,
margins = c(8, 2))
Samples do not appear to cluster by genotype, but sperm samples do cluster separately from liver samples.
These are average motif frequencies in samples with each genotype.
plotMutationSpectrum(vr, group = "Genotype") %>% ggplotly()
signatures <- #callr::r(
# func = function(motif_count_matrix){
# library(signeR)
signeR(M = t(motif_count_matrix))
## Evaluating models with the number of signatures ranging from 1 to 17, please be patient.
## Evaluating 1 signatures.
## EM algorithm:
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============= | 18%
|
|============= | 19%
|
|============== | 20%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 23%
|
|================= | 24%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 40%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 70%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
## Running Gibbs sampler for 1 signature...Done.
## Evaluating 5 signatures.
## EM algorithm:
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============= | 18%
|
|============= | 19%
|
|============== | 20%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 23%
|
|================= | 24%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 40%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 70%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
## Running Gibbs sampler for 5 signatures...Done.
## Evaluating 9 signatures.
## EM algorithm:
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============= | 18%
|
|============= | 19%
|
|============== | 20%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 23%
|
|================= | 24%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 40%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 70%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
## Running Gibbs sampler for 9 signatures...Done.
## Evaluating 13 signatures.
## EM algorithm:
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 13%
|
|========== | 14%
|
|======================================================================| 100%
## Running Gibbs sampler for 13 signatures...Done.
## Refining search for the number of signatures ranging from 1 to 9, please be patient.
## Evaluating 3 signatures (4 evaluations left).
## EM algorithm:
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============= | 18%
|
|============= | 19%
|
|============== | 20%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 23%
|
|================= | 24%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 40%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 70%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
## Running Gibbs sampler for 3 signatures...Done.
## Evaluating 7 signatures (3 evaluations left).
## EM algorithm:
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============= | 18%
|
|============= | 19%
|
|============== | 20%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 23%
|
|================= | 24%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 40%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 70%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
## Running Gibbs sampler for 7 signatures...Done.
## Refining search for the number of signatures ranging from 1 to 5, please be patient.
## Evaluating 2 signatures (2 evaluations left).
## EM algorithm:
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============= | 18%
|
|============= | 19%
|
|============== | 20%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 23%
|
|================= | 24%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 40%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 70%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
## Running Gibbs sampler for 2 signatures...Done.
## Evaluating 4 signatures (last evaluation).
## EM algorithm:
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============= | 18%
|
|============= | 19%
|
|============== | 20%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 23%
|
|================= | 24%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 40%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 70%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
## Running Gibbs sampler for 4 signatures...Done.
## The optimal number of signatures is 3.
## Running Gibbs sampler for 3 signatures...Done.
# },
# args = list(motif_count_matrix = motif_count_matrix))
Whenever signeR is left to decide which number of signatures is optimal, it will search for the rank Nsig that maximizes the median Bayesian Information Criterion (BIC).
BICboxplot(signatures)
The following plots the MCMC sampled paths for each entry of the signature matrix (P) and their exposures (E). Only post-burnin paths are available for plotting. Those plots are useful for checking if entries have leveled off, reflecting the sampler convergence.
Paths(signatures$SignExposures)
Each row shows the entries and exposures of one signature along sampler iterations.
SignPlot(signatures$SignExposures)
Signatures barplot with error bars reflecting the sample percentiles 0.05, 0.25, 0.75, and 0.95 for each entry.
SignHeat(signatures$SignExposures)
ExposureBoxplot(signatures$SignExposures)
ExposureBarplot(signatures$SignExposures)
ExposureHeat(signatures$SignExposures)
Differential expression analysis was performed on all samples after splitting it to two groups Fhit- and Fhit+ (Tsg or Fhit +/+)
diffexp_labels <-
sample_info %>%
mutate(label = case_when(
str_detect(Genotype, "Tsg") ~ "Fhit+ (Tsg or Fhit +/+)",
str_detect(Genotype, "Fhit +/+") ~ "Fhit+ (Tsg or Fhit +/+)",
TRUE ~ "Fhit-"
)) %>%
column_to_rownames("sample_id") %>%
{.[colnames(motif_count_matrix), ]} %>%
pull(label)
fs::dir_create(here::here("temp"))
pdf(here::here("temp","DiffExpPlot%03d.pdf"), onefile = FALSE)
diff_exposure <- DiffExp(signatures$SignExposures, labels=diffexp_labels)
dev.off()
## png
## 2
knitr::include_graphics(here::here("temp","DiffExpPlot001.pdf"))
Differential expression of signatures. Significant signatures are labeled with the group with the highest contribution from that signature. Here no signatures were significant.
working directory clean
Git Commit SHA: 2e4edd8371172e9aa902c50fc91e0a5e92951420
Time Stamp: 2021-February-24 17:36:40
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04 LTS
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] cowplot_1.1.0 plotly_4.9.2.1
## [3] SomaticSignatures_2.26.0 signeR_1.16.0
## [5] NMF_0.23.0 cluster_2.1.0
## [7] rngtools_1.5 pkgmaker_0.32.2
## [9] registry_0.5-1 VariantAnnotation_1.36.0
## [11] Rsamtools_2.6.0 Biostrings_2.58.0
## [13] XVector_0.30.0 SummarizedExperiment_1.20.0
## [15] Biobase_2.50.0 GenomicRanges_1.42.0
## [17] GenomeInfoDb_1.26.1 IRanges_2.24.0
## [19] S4Vectors_0.28.0 MatrixGenerics_1.2.0
## [21] matrixStats_0.57.0 BiocGenerics_0.36.0
## [23] glue_1.4.2 forcats_0.5.0
## [25] stringr_1.4.0 dplyr_1.0.2
## [27] purrr_0.3.4 readr_1.4.0
## [29] tidyr_1.1.2 tibble_3.0.4
## [31] ggplot2_3.3.2 tidyverse_1.3.0
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.1.0 RSQLite_2.2.1 AnnotationDbi_1.52.0
## [4] htmlwidgets_1.5.2 grid_4.0.3 BiocParallel_1.24.1
## [7] munsell_0.5.0 codetools_0.2-16 withr_2.3.0
## [10] colorspace_2.0-0 OrganismDbi_1.32.0 highr_0.8
## [13] knitr_1.30 rstudioapi_0.13 labeling_0.4.2
## [16] git2r_0.27.1 GenomeInfoDbData_1.2.4 bit64_4.0.5
## [19] farver_2.0.3 rprojroot_2.0.2 vctrs_0.3.5
## [22] generics_0.1.0 xfun_0.19 biovizBase_1.38.0
## [25] BiocFileCache_1.14.0 R6_2.5.0 doParallel_1.0.16
## [28] AnnotationFilter_1.14.0 bitops_1.0-6 reshape_0.8.8
## [31] DelayedArray_0.16.0 assertthat_0.2.1 scales_1.1.1
## [34] nnet_7.3-14 gtable_0.3.0 ggbio_1.38.0
## [37] ensembldb_2.14.0 rlang_0.4.9 splines_4.0.3
## [40] rtracklayer_1.50.0 lazyeval_0.2.2 dichromat_2.0-0
## [43] broom_0.7.2 checkmate_2.0.0 BiocManager_1.30.10
## [46] yaml_2.2.1 reshape2_1.4.4 modelr_0.1.8
## [49] GenomicFeatures_1.42.1 crosstalk_1.1.0.1 backports_1.2.0
## [52] Hmisc_4.4-2 RBGL_1.66.0 tools_4.0.3
## [55] gridBase_0.4-7 ellipsis_0.3.1 gplots_3.1.1
## [58] RColorBrewer_1.1-2 proxy_0.4-24 Rcpp_1.0.5
## [61] plyr_1.8.6 base64enc_0.1-3 progress_1.2.2
## [64] zlibbioc_1.36.0 RCurl_1.98-1.2 prettyunits_1.1.1
## [67] rpart_4.1-15 openssl_1.4.3 haven_2.3.1
## [70] fs_1.5.0 here_1.0.0 magrittr_2.0.1
## [73] data.table_1.13.2 reprex_0.3.0 pcaMethods_1.82.0
## [76] ProtGenerics_1.22.0 hms_0.5.3 evaluate_0.14
## [79] xtable_1.8-4 XML_3.99-0.5 jpeg_0.1-8.1
## [82] readxl_1.3.1 gridExtra_2.3 compiler_4.0.3
## [85] biomaRt_2.46.0 KernSmooth_2.23-17 crayon_1.3.4
## [88] htmltools_0.5.0 Formula_1.2-4 lubridate_1.7.9.2
## [91] DBI_1.1.0 dbplyr_2.0.0 rappdirs_0.3.1
## [94] Matrix_1.2-18 cli_2.2.0 pkgconfig_2.0.3
## [97] GenomicAlignments_1.26.0 foreign_0.8-80 xml2_1.3.2
## [100] foreach_1.5.1 PMCMR_4.3 rvest_0.3.6
## [103] digest_0.6.27 graph_1.68.0 rmarkdown_2.5
## [106] cellranger_1.1.0 htmlTable_2.1.0 curl_4.3
## [109] gtools_3.8.2 nloptr_1.2.2.2 lifecycle_0.2.0
## [112] jsonlite_1.7.1 viridisLite_0.3.0 askpass_1.1
## [115] BSgenome_1.58.0 fansi_0.4.1 pillar_1.4.7
## [118] lattice_0.20-41 GGally_2.0.0 httr_1.4.2
## [121] survival_3.2-7 png_0.1-7 iterators_1.0.13
## [124] bit_4.0.4 class_7.3-17 stringi_1.5.3
## [127] blob_1.2.1 latticeExtra_0.6-29 caTools_1.18.0
## [130] memoise_1.1.0